Reran on Sep 15 (2025) by Sagy - removing batch10 and WT stress (since we are using NIH for WT untreated vs WT stress) and CD41
import os
import sys
NOVA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA'
NOVA_DATA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA'
os.environ['NOVA_HOME'] = NOVA_HOME
sys.path.insert(1, os.getenv("NOVA_HOME"))
print(f"NOVA_HOME: {os.getenv('NOVA_HOME')}")
root_directory_raw = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'raw', 'OPERA_indi_sorted')
root_directory_proc = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'processed', 'ManuscriptFinalData_80pct','neuronsDay8_new')
LOGS_PATH = os.path.join(NOVA_HOME, "outputs", "preprocessing", "ManuscriptFinalData_80pct", "neuronsDay8_new", "logs")
PLOT_PATH = os.path.join(NOVA_HOME, 'outputs', 'preprocessing', 'ManuscriptFinalData_80pct', "neuronsDay8_new", 'QC_figures')
print(os.environ['NOVA_HOME'])
import pandas as pd
import contextlib
import io
from IPython.display import display, Javascript
from tools.preprocessing_tools.qc_reports.qc_utils import log_files_qc, run_validate_folder_structure, display_diff, sample_and_calc_variance, \
show_site_survival_dapi_brenner, show_site_survival_dapi_cellpose, \
show_site_survival_dapi_tiling, show_site_survival_target_brenner, \
calc_total_sums, plot_filtering_heatmap, show_total_sum_tables, \
plot_cell_count, plot_catplot, plot_hm_of_mean_cell_count_per_tile, \
run_calc_hist_new, show_total_valid_tiles_per_marker_and_batch
from tools.preprocessing_tools.qc_reports.qc_config import new_d8_panels, new_d8_markers, new_d8_marker_info, new_d8_cell_lines, new_d8_cell_lines_to_cond,\
new_d8_cell_lines_for_disp, new_d8_reps, new_d8_line_colors, new_d8_lines_order, new_d8_custom_palette,\
new_d8_expected_dapi_raw
%load_ext autoreload
%autoreload 2
NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA /home/projects/hornsteinlab/Collaboration/NOVA
# choose batches
batches = ['batch1', 'batch2', 'batch3', 'batch7',
'batch8', 'batch9']
batches
['batch1', 'batch2', 'batch3', 'batch7', 'batch8', 'batch9']
df = log_files_qc(LOGS_PATH, batches, filename_split='-',site_location=0)
df = df[df.condition != 'stress']
df = df[df.cell_line != 'SNCA']
df = df[df.batch != 'batch10']
df_dapi = df[df.marker=='DAPI']
df_target = df[df.marker!='DAPI']
reading logs of batch8 reading logs of batch3 reading logs of batch9 reading logs of batch10 reading logs of batch2 reading logs of batch1 reading logs of batch7 Total of 15 files were read. Before dup handeling (1147717, 21) After duplication removal #1: (1071227, 22) After duplication removal #2: (1071227, 22)
raws = run_validate_folder_structure(root_directory_raw, False,
new_d8_panels,
new_d8_markers,
PLOT_PATH,
new_d8_marker_info,
new_d8_cell_lines_to_cond,
new_d8_reps,
new_d8_cell_lines_for_disp,
new_d8_expected_dapi_raw,
batches=batches,
expected_count=250,
check_antibody=False)
batch1 Folder structure is invalid. Missing 11 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelA /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelB /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelC /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelD /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelE /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelF /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelG /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelH /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelI /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelJ /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelL No bad files are found. Total Sites: 140000
======== batch2 Folder structure is invalid. Missing 2 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch2/WT/panelA/Untreated/rep2 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch2/SNCA No bad files are found. Total Sites: 139000
======== batch3 Folder structure is invalid. Missing 1 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch3/SNCA No bad files are found. Total Sites: 139997
======== batch7 Folder structure is valid. No bad files are found. Total Sites: 160000
======== batch8 Folder structure is valid. No bad files are found. Total Sites: 160000
======== batch9 Folder structure is valid. No bad files are found. Total Sites: 159996
======== ====================
procs = run_validate_folder_structure(root_directory_proc, True,
new_d8_panels,
new_d8_markers,
PLOT_PATH,
new_d8_marker_info,
new_d8_cell_lines_to_cond,
new_d8_reps,
new_d8_cell_lines_for_disp,
new_d8_expected_dapi_raw,
batches=batches,
expected_count=250,
check_antibody=False)
batch1 Folder structure is invalid. Missing 8 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/FUSHomozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/TDP43/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/TBK1/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/WT/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/FUSRevertant/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/OPTN/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/FUSHeterozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1/SNCA No bad files are found. Total Sites: 123380
======== batch2 Folder structure is invalid. Missing 8 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/FUSHomozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/TDP43/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/TBK1/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/WT/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/FUSRevertant/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/OPTN/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/FUSHeterozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2/SNCA No bad files are found. Total Sites: 123753
======== batch3 Folder structure is invalid. Missing 8 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/FUSHomozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/TDP43/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/TBK1/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/WT/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/FUSRevertant/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/OPTN/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/FUSHeterozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3/SNCA No bad files are found. Total Sites: 124029
======== batch7 Folder structure is invalid. Missing 8 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/FUSHomozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/TDP43/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/TBK1/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/WT/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/FUSRevertant/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/OPTN/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/FUSHeterozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch7/SNCA/Untreated/CD41 No bad files are found. Total Sites: 132800
======== batch8 Folder structure is invalid. Missing 8 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/FUSHomozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/TDP43/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/TBK1/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/WT/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/FUSRevertant/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/OPTN/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/FUSHeterozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch8/SNCA/Untreated/CD41 No bad files are found. Total Sites: 145664
======== batch9 Folder structure is invalid. Missing 8 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/FUSHomozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/TDP43/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/TBK1/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/WT/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/FUSRevertant/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/OPTN/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/FUSHeterozygous/Untreated/CD41 /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch9/SNCA/Untreated/CD41 No bad files are found. Total Sites: 130955
======== ====================
display_diff(batches, raws, procs, PLOT_PATH)
batch1
======== batch2
======== batch3
======== batch7
======== batch8
======== batch9
========
for batch in batches:
with contextlib.redirect_stdout(io.StringIO()):
var = sample_and_calc_variance(root_directory_proc, batch,
sample_size_per_markers=200, num_markers=30)
print(f'{batch} var: ',var)
batch1 var: 0.05036356098714817 batch2 var: 0.05258486770781657 batch3 var: 0.05210547185371908 batch7 var: 0.050002735702223834 batch8 var: 0.05076600847216654 batch9 var: 0.050907968037621834
By order of filtering
Percentage out of the total sites
dapi_filter_by_brenner = show_site_survival_dapi_brenner(df_dapi,
batches,
new_d8_line_colors,
new_d8_panels,
new_d8_reps,
vmax=250,
to_ignore={'cell_line_cond':'SNCA','batch':['batch1','batch2','batch3']}
)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if Cellpose found 0 cells in it.
dapi_filter_by_cellpose = show_site_survival_dapi_cellpose(df_dapi,
batches,
dapi_filter_by_brenner,
new_d8_line_colors,
new_d8_panels,
new_d8_reps,
figsize=(7,5),
to_ignore={'cell_line_cond':['SNCA'],'batch':['batch1','batch2','batch3']}
)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if after tiling, no tile is containing at least one whole cell that Cellpose detected.
dapi_filter_by_tiling = show_site_survival_dapi_tiling(df_dapi,
batches,
dapi_filter_by_cellpose,
new_d8_line_colors,
new_d8_panels,
new_d8_reps,
figsize=(7,5),
to_ignore={'cell_line_cond':['SNCA'],'batch':['batch1','batch2','batch3', 'batch7', 'batch8', 'batch9']}
)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).
show_site_survival_target_brenner(df_dapi,
df_target,
dapi_filter_by_tiling,
new_d8_markers,
figsize=(7,8))
stats = ['n_valid_tiles','site_whole_cells_counts_sum','site_cell_count','site_cell_count_sum']
total_sum = calc_total_sums(df_target, df_dapi, stats, new_d8_markers)
total_sum[~total_sum.marker.str.contains('CD41', regex=True)].n_valid_tiles.sum()
6495286
## Total tiles in wt lines
total_sum[(~total_sum.marker.str.contains('CD41', regex=True))&
(total_sum.cell_line_cond.isin(['WT stress', 'WT Untreated']))].n_valid_tiles.sum()
1037929
## Total tiles in untreated lines
total_sum[(~total_sum.marker.str.contains('CD41', regex=True))&
((~total_sum.cell_line_cond.str.contains('WT')) | (total_sum.cell_line_cond=='WT Untreated'))].n_valid_tiles.sum()
6495286
total_sum[total_sum.marker =='DAPI'].site_whole_cells_counts_sum.sum()
1250175.0
total_sum[total_sum.marker =='DAPI'].site_cell_count.sum()
5723828.0
show_total_sum_tables(total_sum)
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch1 | ||||
| count | 5.740000e+02 | 574.000000 | 574.000000 | 5.740000e+02 |
| mean | 2.228162e+03 | 22.281620 | 1354.630662 | 5.722444e+03 |
| std | 7.139471e+02 | 7.139471 | 510.552754 | 1.816201e+03 |
| min | 4.700000e+02 | 4.700000 | 263.000000 | 1.131000e+03 |
| 25% | 1.658250e+03 | 16.582500 | 1026.000000 | 4.587000e+03 |
| 50% | 2.262000e+03 | 22.620000 | 1303.000000 | 5.642000e+03 |
| 75% | 2.762250e+03 | 27.622500 | 1710.500000 | 7.051000e+03 |
| max | 3.763000e+03 | 37.630000 | 2646.000000 | 1.027900e+04 |
| sum | 1.278965e+06 | NaN | 777558.000000 | 3.284683e+06 |
| expected_count | 4.500000e+02 | 450.000000 | 450.000000 | 4.500000e+02 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch2 | ||||
| count | 572.000000 | 572.000000 | 572.000000 | 5.720000e+02 |
| mean | 1581.059441 | 15.810594 | 849.428322 | 3.946177e+03 |
| std | 474.353423 | 4.743534 | 257.416220 | 1.051369e+03 |
| min | 183.000000 | 1.830000 | 67.000000 | 4.770000e+02 |
| 25% | 1269.250000 | 12.692500 | 672.000000 | 3.237000e+03 |
| 50% | 1658.500000 | 16.585000 | 878.500000 | 4.045000e+03 |
| 75% | 1926.000000 | 19.260000 | 1048.500000 | 4.695000e+03 |
| max | 2511.000000 | 25.110000 | 1670.000000 | 6.325000e+03 |
| sum | 904366.000000 | NaN | 485873.000000 | 2.257213e+06 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 4.500000e+02 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch3 | ||||
| count | 574.000000 | 574.000000 | 574.000000 | 5.740000e+02 |
| mean | 1651.623693 | 16.516237 | 862.040070 | 4.084611e+03 |
| std | 509.486609 | 5.094866 | 269.189087 | 1.110487e+03 |
| min | 337.000000 | 3.370000 | 163.000000 | 7.830000e+02 |
| 25% | 1309.250000 | 13.092500 | 690.000000 | 3.449500e+03 |
| 50% | 1719.500000 | 17.195000 | 860.000000 | 4.158500e+03 |
| 75% | 2003.000000 | 20.030000 | 1030.000000 | 4.891500e+03 |
| max | 2802.000000 | 28.020000 | 1696.000000 | 6.807000e+03 |
| sum | 948032.000000 | NaN | 494811.000000 | 2.344567e+06 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 4.500000e+02 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch7 | ||||
| count | 5.720000e+02 | 572.000000 | 572.000000 | 5.720000e+02 |
| mean | 1.976990e+03 | 19.769895 | 1256.765734 | 5.931505e+03 |
| std | 7.455038e+02 | 7.455038 | 459.165135 | 1.841517e+03 |
| min | 5.000000e+00 | 0.050000 | 2.000000 | 1.500000e+01 |
| 25% | 1.628500e+03 | 16.285000 | 1008.750000 | 5.004250e+03 |
| 50% | 2.131000e+03 | 21.310000 | 1236.500000 | 6.101500e+03 |
| 75% | 2.479250e+03 | 24.792500 | 1510.250000 | 6.924000e+03 |
| max | 3.571000e+03 | 35.710000 | 2472.000000 | 9.793000e+03 |
| sum | 1.130838e+06 | NaN | 718870.000000 | 3.392821e+06 |
| expected_count | 4.500000e+02 | 450.000000 | 450.000000 | 4.500000e+02 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch8 | ||||
| count | 5.740000e+02 | 574.000000 | 574.000000 | 5.740000e+02 |
| mean | 2.166226e+03 | 21.662265 | 1294.651568 | 5.999894e+03 |
| std | 6.896234e+02 | 6.896234 | 361.351227 | 1.397893e+03 |
| min | 1.900000e+02 | 1.900000 | 130.000000 | 4.830000e+02 |
| 25% | 1.799750e+03 | 17.997500 | 1049.750000 | 5.079500e+03 |
| 50% | 2.260500e+03 | 22.605000 | 1290.500000 | 6.085500e+03 |
| 75% | 2.679250e+03 | 26.792500 | 1579.000000 | 7.172000e+03 |
| max | 3.458000e+03 | 34.580000 | 2092.000000 | 9.124000e+03 |
| sum | 1.243414e+06 | NaN | 743130.000000 | 3.443939e+06 |
| expected_count | 4.500000e+02 | 450.000000 | 450.000000 | 4.500000e+02 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch9 | ||||
| count | 5.710000e+02 | 571.000000 | 571.000000 | 5.710000e+02 |
| mean | 2.058778e+03 | 20.587776 | 1236.798599 | 5.708704e+03 |
| std | 8.504084e+02 | 8.504084 | 458.074241 | 1.981558e+03 |
| min | 0.000000e+00 | 0.000000 | 8.000000 | 1.100000e+01 |
| 25% | 1.341500e+03 | 13.415000 | 975.500000 | 4.564500e+03 |
| 50% | 2.268000e+03 | 22.680000 | 1296.000000 | 6.112000e+03 |
| 75% | 2.734500e+03 | 27.345000 | 1570.000000 | 7.177000e+03 |
| max | 3.475000e+03 | 34.750000 | 2128.000000 | 9.442000e+03 |
| sum | 1.175562e+06 | NaN | 706212.000000 | 3.259670e+06 |
| expected_count | 4.500000e+02 | 450.000000 | 450.000000 | 4.500000e+02 |
| n valid tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| All batches | ||||
| count | 3.437000e+03 | 3437.000000 | 3.437000e+03 | 3.437000e+03 |
| mean | 1.943898e+03 | 19.438979 | 1.142407e+03 | 5.232148e+03 |
| std | 7.194740e+02 | 7.194740 | 4.479516e+02 | 1.798140e+03 |
| min | 0.000000e+00 | 0.000000 | 2.000000e+00 | 1.100000e+01 |
| 25% | 1.459000e+03 | 14.590000 | 8.190000e+02 | 3.991000e+03 |
| 50% | 1.995000e+03 | 19.950000 | 1.106000e+03 | 5.196000e+03 |
| 75% | 2.445000e+03 | 24.450000 | 1.422000e+03 | 6.541000e+03 |
| max | 3.763000e+03 | 37.630000 | 2.646000e+03 | 1.027900e+04 |
| sum | 6.681177e+06 | NaN | 3.926454e+06 | 1.798289e+07 |
| expected_count | 4.500000e+02 | 450.000000 | 4.500000e+02 | 4.500000e+02 |
For each batch, cell line, replicate and markerTotal number of tiles
show_total_valid_tiles_per_marker_and_batch(total_sum, vmax=10000)
to_heatmap = total_sum.rename(columns={'n_valid_tiles':'index'})
plot_filtering_heatmap(to_heatmap,
extra_index='marker',
vmin=None, vmax=None,
xlabel = 'Total number of tiles',
show_sum=True,
figsize=(8,28))
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
For each batch, cell line, replicate and markerTotal number of tiles
to_heatmap = total_sum.rename(columns={'site_whole_cells_counts_sum':'index'})
plot_filtering_heatmap(to_heatmap, #to_heatmap[to_heatmap.batch=='batch7'],
extra_index='marker',
vmin=None, vmax=None,
xlabel = 'Total number of whole cells',
show_sum=True,
figsize=(8,28))
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:394: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]
plot_cell_count(df_no_empty_sites,
new_d8_lines_order,
new_d8_custom_palette,
y='site_cell_count_sum',
title='Cell Count Average per Site (from tiles)')
plot_cell_count(df_no_empty_sites,
new_d8_lines_order,
new_d8_custom_palette,
y='site_whole_cells_counts_sum',
title='Whole Cell Count Average per Site')
plot_cell_count(df_no_empty_sites,
new_d8_lines_order,
new_d8_custom_palette,
y='site_cell_count',
title='Cellpose Cell Count Average per Site')
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:621: UserWarning: The palette list has more values (10) than needed (8), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
df_dapi.groupby(['cell_line_cond']).n_valid_tiles.mean()
cell_line_cond FUSHeterozygous 8.403153 FUSHomozygous 8.187867 FUSRevertant 7.872268 OPTN 8.783673 TBK1 7.830437 TDP43 8.929780 WT Untreated 9.520752 Name: n_valid_tiles, dtype: float64
# number of valid tiles per site (on average)
import numpy as np
np.mean(df_dapi.groupby(['cell_line_cond']).n_valid_tiles.mean())
8.503990167417712
df_dapi[['site_cell_count']].mean()
site_cell_count 22.911258 dtype: float64
# number of tiles per site
import ast
len(ast.literal_eval(df_dapi['cells_counts'].iloc[0].replace(' ', ',')))
64
plot_catplot(df_dapi,
new_d8_custom_palette,
new_d8_reps,
x='n_valid_tiles',
x_title='valid tiles count',
batches=batches)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1061: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df.loc[:, 'batch_rep'] = df['batch'] + " " + df['rep']
plot_hm_of_mean_cell_count_per_tile(df_dapi, split_by='rep', rows='cell_line', columns='panel', vmax=3)
df_dapi[['cells_count_in_valid_tiles_mean']].mean()
cells_count_in_valid_tiles_mean 1.713208 dtype: float64
df_dapi[['site_cell_count']].mean()
site_cell_count 22.911258 dtype: float64
# for batch in batches:
# print(batch)
# #batch_num = batch.replace('batch',"")
# run_calc_hist_new(batch,new_d8_cell_lines_for_disp, new_d8_markers, root_directory_raw, root_directory_proc,
# hist_sample=10,sample_size_per_markers=200, ncols=7, nrows=4)
# print("="*30)
# # save notebook as HTML ( the HTML will be saved in the same folder the original script is)
# display(Javascript('IPython.notebook.save_checkpoint();'))
# os.system(f'jupyter nbconvert --to html {NOVA_HOME}/tools/preprocessing_tools/qc_reports/qc_report_new_indi_d8_Opera.ipynb --output {NOVA_HOME}/manuscript/preprocessing_qc_reports/ManuscriptFinalData/qc_report_new_indi_d8_Opera.html')